count_neg <- function(vector) {
neg_numbers = 0
for (i in vector) {
if (i < 0) {
neg_numbers = neg_numbers + 1
}
}
print(neg_numbers)
}
count_neg(c(1,2,3,4))
## [1] 0
count_neg(c(1,2,-3,4))
## [1] 1
count_neg(c(-1,2,-3,4))
## [1] 2
vlen <- function(x) {
if (length(x)) {
print(sqrt(sum(x^2)))
}
}
vlen(1)
## [1] 1
vlen(2)
## [1] 2
vlen(c(3,4))
## [1] 5
beer <- function(beer) {
for (i in beer:1) {
if (i > 2) {
cat(i,"bottles of beer on the wall,", i, "bottles of beer.", "\n")
cat("Take one down and pass it around,", (i-1), "bottles of beer on the wall.","\n\n")
} else if (i == 2) {
cat("2 bottles of beer on the wall, 2 bottles of beer.", "\n")
cat("Take one down and pass it around, 1 bottle of beer on the wall.", "\n\n")
} else if (i == 1) {
cat("1 bottle of beer on the wall, 1 bottle of beer.", "\n")
cat("Take one down and pass it around, no more bottles of beer on the wall.", "\n\n")
cat("No more bottles of beer on the wall, no more bottles of beer.", "\n")
cat("Go to the store and buy some more, 99 bottles of beer on the wall.", "\n\n")
}
}
}
beer(5)
## 5 bottles of beer on the wall, 5 bottles of beer.
## Take one down and pass it around, 4 bottles of beer on the wall.
##
## 4 bottles of beer on the wall, 4 bottles of beer.
## Take one down and pass it around, 3 bottles of beer on the wall.
##
## 3 bottles of beer on the wall, 3 bottles of beer.
## Take one down and pass it around, 2 bottles of beer on the wall.
##
## 2 bottles of beer on the wall, 2 bottles of beer.
## Take one down and pass it around, 1 bottle of beer on the wall.
##
## 1 bottle of beer on the wall, 1 bottle of beer.
## Take one down and pass it around, no more bottles of beer on the wall.
##
## No more bottles of beer on the wall, no more bottles of beer.
## Go to the store and buy some more, 99 bottles of beer on the wall.
my_cor <- function(x,y) {
n = length(x)
numerator <- n*sum(x*y)-sum(x)*sum(y)
denominator <- sqrt((n*sum(x^2)-sum(x)^2)*(n*sum(y^2)-sum(y)^2))
print(numerator/denominator)
}
my_cor(1:5, 1:5)
## [1] 1
my_cor(3:1, 1:3)
## [1] -1
my_cor(seq(-5, 5), seq(-5, 5)^2)
## [1] 0
data(tli)
tli |>
select(grade, tlimth) |>
group_by(grade) |>
summarise(avg_score = mean(tlimth)) |>
slice_max(avg_score)
## # A tibble: 1 × 2
## grade avg_score
## <int> <dbl>
## 1 6 82.3
Grade 6 has the highest average exam scores with a 82.3.
tli |>
select(disadvg, tlimth) |>
group_by(disadvg) |>
summarize(avg_score = mean(tlimth)) |>
mutate(diff(avg_score))
## # A tibble: 2 × 3
## disadvg avg_score `diff(avg_score)`
## <fct> <dbl> <dbl>
## 1 NO 78.1 -4.75
## 2 YES 73.3 -4.75
tli |>
select(grade, sex, tlimth) |>
group_by(grade, sex) |>
summarise(avg_score = mean(tlimth)) |>
mutate(difference = diff(avg_score))
## `summarise()` has grouped output by 'grade'. You can override using the
## `.groups` argument.
## # A tibble: 12 × 4
## # Groups: grade [6]
## grade sex avg_score difference
## <int> <fct> <dbl> <dbl>
## 1 3 F 67 4
## 2 3 M 71 4
## 3 4 F 74.2 2.28
## 4 4 M 76.5 2.28
## 5 5 F 69.2 12.6
## 6 5 M 81.9 12.6
## 7 6 F 84.6 -4.55
## 8 6 M 80.1 -4.55
## 9 7 F 78.3 5.58
## 10 7 M 83.9 5.58
## 11 8 F 73.6 -4.43
## 12 8 M 69.1 -4.43
Grade 5 has the largest difference in average test performance by gender.
tli |>
select(sex, disadvg) |>
group_by(disadvg) |>
count(sex)
## # A tibble: 4 × 3
## # Groups: disadvg [2]
## disadvg sex n
## <fct> <fct> <int>
## 1 NO F 36
## 2 NO M 29
## 3 YES F 15
## 4 YES M 20
The gender difference is larger for students not from economically disadvantaged backgrounds.
tli |>
select(sex, ethnicty) |>
group_by(ethnicty) |>
count(sex)
## # A tibble: 7 × 3
## # Groups: ethnicty [4]
## ethnicty sex n
## <fct> <fct> <int>
## 1 BLACK F 11
## 2 BLACK M 12
## 3 HISPANIC F 8
## 4 HISPANIC M 12
## 5 OTHER F 2
## 6 WHITE F 30
## 7 WHITE M 25
Black ethnic group exhibits the smallest gender gap. I do not trust the results because the sample sizes for all ethnic groups ranges.
tli |>
count(sex)
## sex n
## 1 F 51
## 2 M 49
tli |>
count(grade)
## grade n
## 1 3 15
## 2 4 15
## 3 5 15
## 4 6 23
## 5 7 18
## 6 8 14
tli |>
count(ethnicty)
## ethnicty n
## 1 BLACK 23
## 2 HISPANIC 20
## 3 OTHER 2
## 4 WHITE 55
tli |>
count(disadvg)
## disadvg n
## 1 NO 65
## 2 YES 35
I would not trust any conclusions because the number of students in each grade ranges, there is a disparity in the number of students per ethnic background, and there is a disproportion among those considered coming from economically disadvantaged backgrounds and not.
data(diamonds)
ggplot(data = diamonds, aes(x=carat, y=price)) +
geom_point() +
facet_wrap(~cut) +
theme_bw()
ggplot(data = diamonds, aes(x=carat, y=price)) +
geom_point() +
geom_smooth(aes(color=color), se=FALSE) +
facet_wrap(~cut) +
theme_bw() +
theme(legend.position = "bottom")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(diamonds, aes(x=price, color=cut)) +
geom_freqpoly() +
facet_wrap(~cut) +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(diamonds, aes(x=color, y=clarity)) + geom_point()
This is not a useful plot as there is no clear pattern being depicted. The color and clarity components both represent categorical variables.
ggplot(diamonds) +
geom_mosaic(aes(x=product(color), fill=clarity)) +
theme_bw()
## Warning: The `scale_name` argument of `continuous_scale()` is deprecated as of ggplot2
## 3.5.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `trans` argument of `continuous_scale()` is deprecated as of ggplot2 3.5.0.
## ℹ Please use the `transform` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `unite_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `unite()` instead.
## ℹ The deprecated feature was likely used in the ggmosaic package.
## Please report the issue at <https://github.com/haleyjeppson/ggmosaic>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
data(gapminder)
year_1952 <- subset(gapminder, year == 1952)
ggplot(year_1952, aes(x=gdpPercap, y=lifeExp, color=continent, size = log(pop))) +
geom_point()
year_1952new <- subset(year_1952, country != "Kuwait")
ggplot(year_1952new, aes(x=gdpPercap, y=lifeExp, color=continent, size = log(pop))) +
geom_point()
gapminder_new <- subset(gapminder, country != "Kuwait")
ggplot(gapminder_new, aes(x=gdpPercap, y=lifeExp, color=continent, size = log(pop))) +
geom_point() +
transition_time(year)
ggplot(gapminder_new, aes(x=gdpPercap, y=lifeExp, color=continent, size = (log(pop)))) +
geom_point() +
transition_time(year) +
theme_bw() +
ggtitle("Life Expectancy VS GDP ({frame_time})") +
ylab("Life Expectancy") +
xlab("GDP Per Capita") +
theme(legend.position = "bottom") +
guides(size="none")
ggplot(gapminder_new, aes(x=gdpPercap, y=lifeExp, color=continent, size = (log(pop)))) +
geom_point() +
transition_time(year) +
theme_bw() +
ggtitle("Life Expectancy VS GDP ({frame_time})") +
ylab("Life Expectancy") +
xlab("GDP Per Capita") +
theme(legend.position = "bottom") +
guides(size="none") +
scale_color_manual(values=c("Europe"="brown","Asia"="red","Africa"="blue","Americas"="yellow"))
setwd("/Users/monirulislam/Desktop/Weylandt HW/Weylandt HW3/nycc_24a")
nycc <- read_sf("nycc.shp")
ggplot(nycc) + geom_sf()
setwd("/Users/monirulislam/Desktop/Weylandt HW/Weylandt HW3/nycc_24a")
nyc_demos <- read.csv("nyc_demos.csv")
nycc_demos <- inner_join(nyc_demos,nycc, join_by(district==CounDist))
Under5 <-subset(nycc_demos, variable == 'Population') |> select(-Y2000) |> filter(field == "Under 5 years")
ggplot(Under5) +
geom_sf(aes(fill=Y2010, geometry=geometry)) +
scale_fill_viridis_b() +
ggtitle("Population Under 5 Years By Council District") +
theme_bw()
Some parts of Manhattan and Queens seem to have the fewest young children as opposed to Brooklyn.
setwd("/Users/monirulislam/Desktop/Weylandt HW/Weylandt HW3/nycc_24a")
voters <- nycc_demos |>
filter(field %in% c("18 years and over")) |>
select(-Y2000) |>
arrange(district) |>
mutate(avg_voters = (sum(Y2010))/51) |>
mutate(difference = Y2010 - avg_voters)
ggplot(voters) +
geom_sf(aes(fill=difference, geometry=geometry)) +
scale_fill_viridis_b() +
theme_bw() +
ggtitle("Difference In Adult Population By Average Voters Per District")
Several districts in Brooklyn, including 40, 41, 44, and 45 are significantly under-represented. Whereas, Manhattan districts including 1, 2, and 3 are significantly over represented.
setwd("/Users/monirulislam/Desktop/Weylandt HW/Weylandt HW3/nycc_24a")
race <- nycc_demos |>
select(-Y2000) |>
filter(field %in% c("White Nonhispanic","Black Nonhispanic","Asian and Pacific Islander Nonhispanic","Other Nonhispanic","Two or More Races Nonhispanic","Hispanic Origin")) |>
group_by(district) |>
mutate(percentage=(Y2010/sum(Y2010))*100) |>
arrange(district)
ggplot(race) +
geom_sf(aes(fill=percentage, geometry=geometry)) +
scale_fill_viridis_b() +
theme_bw() +
facet_wrap(~field) +
theme(strip.text = element_text(size = 7)) +
ggtitle("Percentage Distribution Of Residents By Race")
setwd("/Users/monirulislam/Desktop/Weylandt HW/Weylandt HW3/nycc_24a")
Owner_Renter <- nycc_demos |>
filter(field %in% c("Householder Age - Owner occupied_15 to 24 years",
"Householder Age - Owner occupied_25 to 44 years",
"Householder Age - Owner occupied_45 to 64 years",
"Householder Age - Owner occupied_65 years and over",
"Renter occupied_Male householder, no wife present")) |>
select(-Y2000)|>
arrange(district) |>
group_by(district) |>
mutate(percentage=(Y2010/sum(Y2010))*100)
ggplot(Owner_Renter) +
geom_sf(aes(fill=percentage, geometry=geometry)) +
scale_fill_viridis_b() +
facet_wrap(~field) +
theme_bw() +
theme(strip.text = element_text(size = 5)) +
ggtitle("Percentage Distribution Of Owner And Renter Housing Per District By Age")
Based off the visualization and also off expectations, percentage of home ownership between the ages of 15 to 24 is low across all districts because people at this point are still in school, figuring out their lives, and simply do not have the financial means to own a home. There is a higher percentage of renters in the Brooklyn and Manhattan districts because in general asset prices are significantly higher compared to the other boroughs, which makes renting a better financial option.